%{
AUTHOR: Felipe Arteaga
-------------------------------------------------------------------------
PROJECT: Warnings
-------------------------------------------------------------------------
DESCRIPTION:
=========================================================================
%}


clearvars -except projectDir projectDirData fromMainWarningsPaper
clc;close all;fclose('all');feature('DefaultCharacterSet','UTF-8');

if(not(exist('projectDir','var')==1&&exist('projectDirData','var')==1&&exist('fromMainWarningsPaper','var')==1&&fromMainWarningsPaper))
    pcName=char(java.lang.System.getProperty('user.name'));
    if(strcmp(pcName,'felipe'))
        % PC Felipe
        myDir='/Users/felipe/Dropbox/';
        projectDir=[myDir,'git/warnings/'];
        projectDirData=[myDir,'projects/warnings/'];
        addpath(genpath([myDir,'/myMatlabFunctions/']));
    end
end
compileLatexTable=false;

dirPlots=[projectDir,'/paper/figuresCL/RDs/'];
dirTable=[projectDir,'/paper/tablesCL/'];
dirData=[projectDirData,'/data/chile/'];


% Anho==0 means pooling all years
anho=0;
if(anho>0)
   anhoStr=sprintf('%i',anho); 
else
anhoStr='';
end



if(anho==0)
    
    d1=load([dirData,'/2018/inputRD'],'dataRD');
    d2=load([dirData,'/2019/inputRD'],'dataRD');
    d3=load([dirData,'/2020/inputRD'],'dataRD');
    fillWithNan2020={'declineOffer','enrolledInAssigned'};
    
    for v=1:length(fillWithNan2020)
        d3.dataRD.(fillWithNan2020{v})=nan(height(d3.dataRD),1);
    end
    
       fillWithZeros2018_2019={'pobWhatsapp','riskWhatsapp','controlWhatsapp'};
    for v=1:length(fillWithZeros2018_2019)
        d1.dataRD.(fillWithZeros2018_2019{v})=zeros(height(d1.dataRD),1);
        d2.dataRD.(fillWithZeros2018_2019{v})=zeros(height(d2.dataRD),1);
    end
    
    
   
    
    
    entryGrades=[-1 0 1 7 9];
    sirven1=ismember(d1.dataRD.grade,entryGrades)&not(d1.dataRD.cod_reg==13);
    sirven2=ismember(d2.dataRD.grade,entryGrades)&not(d2.dataRD.cod_reg==13);
    sirven3=ismember(d3.dataRD.grade,entryGrades)&not(d3.dataRD.cod_reg==13);
    
    d1.dataRD.comparable=sirven1;
    d2.dataRD.comparable=sirven2;
    d3.dataRD.comparable=sirven3;
    
    d1.dataRD.anho=2018*ones(height(d1.dataRD),1);
    d2.dataRD.anho=2019*ones(height(d2.dataRD),1);
    d3.dataRD.anho=2020*ones(height(d3.dataRD),1);
    
    
     incommon=intersect(intersect(d1.dataRD.Properties.VariableNames,d2.dataRD.Properties.VariableNames),d3.dataRD.Properties.VariableNames);
     incommon=incommon(not(strcmp(incommon,'mrun')));
    dataRD=[d1.dataRD(:,incommon);d2.dataRD(:,incommon);d3.dataRD(:,incommon)];
    
    
    
else
    load([dirData,anhoStr,'inputRD'],'dataRD')
end
%%
% dataRD.participate2ndRound=scalarForTable(false,dataRD);
% dataRD.placed2ndRound=scalarForTable(false,dataRD);
% 
% % recupera participacion en 2da vuelta y placement en 2da
% for a=2018:2020
%     dataPublica=readtable([myDir,'/Mineduc/dataCompilada/1. SAE/_csv/compiladoPostulantes',num2str(a),'.csv'],'delimiter',',');
%     
%     if a==2021
%         dataPublica.id_postulante=dataPublica.id_postulante_2;
%     end
%     if(not(ismember('id_postulante',dataPublica.Properties.VariableNames)))
%         conversion=readtable([myDir,'/Mineduc/dataCompilada/Aux/_csv/conversionMRun',num2str(a),'_1.csv'],'delimiter',',');
%         conversion=conversion(:,{'id_postulante','mrun'});
%         if(anho==2017)
%             conversion.id_postulante=mat2cellstr(conversion.id_persona,'withThousandsSeparator',false);
%         end
%         dataPublica=innerjoin(dataPublica,conversion,'key','mrun');
%     end
%     
%     
%     [estan,pos]=ismember(dataRD.id_postulante,dataPublica.id_postulante);
%     
%    
%     
%     dataRD.participate2ndRound(estan)=dataPublica.participa_2(pos(estan))==1;
%     dataRD.placed2ndRound(estan)=dataPublica.asignadoEnPref_2(pos(estan))==1;
% 
% end


assert(allunique(dataRD.id_postulante))

if(anho==0||anho==2020)
% Load survey sample
% Load survey data
survey=readtable([projectDirData,'/data/chile/2020/dataEncuestaMail.csv']);
survey.inSurvey2020=survey.progress==100; % As defined on paper

dataRD=outerjoin(dataRD,survey,'keys',{'id_postulante'},'mergeKeys',true,'type','left','rightVariables',{'inSurvey2020'});
    
end

%% Add value added de chris
% Value added (this is from Chris)

% La hago de nuevo, pq no está la var pal 2020
dataRD.enrolledInAssigned=dataRD.rbdEnrolled==dataRD.rbdAsign_1&dataRD.assignedToPref;% Importante el ultimo pa sacar los matricula asegurada


dataRD.enrolledInAny=not(isnan(dataRD.rbdEnrolled));

dataRD.valueAddedEnrolled(dataRD.grade>8)=nan;
dataRD.hasValueAddedEnrolled=double(not(isnan(dataRD.valueAddedEnrolled)));
dataRD.hasValueAddedEnrolled(dataRD.grade>8)=nan;

dataRD.valueAddedEnrolledIfPlaced=dataRD.valueAddedEnrolled;
dataRD.valueAddedEnrolledIfPlaced(not(dataRD.enrolledInAssigned))=nan;

dataRD.valueAddedEnrolledIfNotPlaced=dataRD.valueAddedEnrolled;
dataRD.valueAddedEnrolledIfNotPlaced((dataRD.enrolledInAssigned))=nan;

dataRD.feeEnrolled=dataRD.feeEnrolled/750;
dataRD.hasFeeEnrolled=double(dataRD.feeEnrolled>0);
dataRD.hasFeeEnrolled(isnan(dataRD.rbdEnrolled))=nan;


%%

dataRD=sortrows(dataRD,{'anho','id_postulante'});
%%

feeCats1=[0 0 50 100 160]; % Ojo, pa ver si quedan ok los bins, fijarse en oferta, no en base de copago.
aux=discretize(dataRD.fee_firstIni,feeCats1,'categorical','IncludedEdge','right');
auxCatsFee=categories(aux);

pL={'A. ','B. ','C. ','D. ','E. ','F. ','G. ','H. ','I. '};
panelPos=1;p=0;
panels=cell(10,2);
newCell=cell(10,3);
v=newCell;
varsC={};


% Ver var "descRadio" para definicion



%% Funs:
mean_=@(x,subg)mean(x(subg));
nanmean_=@(x,subg)mean(x(subg),'omitnan');
quantCond_=@(x,q,subg)quantile(x(subg&x>0.01),q);
sum_=@(x,subg)sum(x(subg));
meanAddAny=@(x,subg)mean(x(dataRD.addAny&subg));
meanWPlacement_=@(x,subg)mean(x(subg&dataRD.anho<2020));

%% Panel general info
p=p+1;v(p,:)={ones(height(dataRD),1),sum_,'N'};
p=p+1;v(p,:)={ones(height(dataRD),1)/height(dataRD),sum_,'%'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;

% PrePanel. Number of schools
panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Demographics']};
%p=p+1;v(p,:)={dataRD.female,mean_,'Female'};
p=p+1;v(p,:)={dataRD.esSep ,mean_,'Economically Vulnerable'};
%p=p+1;v(p,:)={dataRD.alto_rendimiento_1 ,mean_,'High performance'};
%p=p+1;v(p,:)={dataRD.reliableGeo ,mean_,'Reliable geocoding'};
p=p+1;v(p,:)={dataRD.newMarket<0 ,mean_,'Rural'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;




%% Panel Application Behavior
panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Application behavior']};
p=p+1;v(p,:)={dataRD.lengthIni ,mean_,'Length initial attempt'};
p=p+1;v(p,:)={dataRD.lengthEnd ,mean_,'Length final attempt'};
p=p+1;v(p,:)={ dataRD.totalAttempts,mean_,'Total attempts'};
%p=p+1;v(p,:)={dataRD.totalDays ,mean_,'Nº of different days'};
p=p+1;v(p,:)={dataRD.changeAnyIniEnd,mean_,'Any modification'};
%p=p+1;v(p,:)={dataRD.changeAnyTop3IniEnd ,mean_,'Change any top 3'};
p=p+1;v(p,:)={dataRD.addAnyIniEnd ,mean_,'Add any'};
% p=p+1;v(p,:)={dataRD.addAsLastIniEnd ,mean_,'Add as last'};
% p=p+1;v(p,:)={dataRD.addInBetweenIniEnd ,mean_,'Add to middle'};
% p=p+1;v(p,:)={dataRD.addAsFirstIniEnd ,mean_,'Add as first'};
% p=p+1;v(p,:)={dataRD.changeOrigOrderIniEnd ,mean_,'Change order'};
% p=p+1;v(p,:)={dataRD.changeTop1IniEnd ,mean_,'Change top 1'};
% p=p+1;v(p,:)={dataRD.deleteAnyIniEnd ,mean_,'Delete any'};
% p=p+1;v(p,:)={dataRD.deleteAllIniEnd ,mean_,'Delete all'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;



%% Panel placement
panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Placement']};
p=p+1;v(p,:)={ dataRD.assignedToPref,mean_,'Placed in pref.'};
p=p+1;v(p,:)={dataRD.preferenciaAsign_1==1 ,mean_,'Placed 1st'};
% p=p+1;v(p,:)={dataRD.preferenciaAsign_1==2,mean_,'Placed 2nd'};
% p=p+1;v(p,:)={dataRD.preferenciaAsign_1==3 ,mean_,'Placed 3rd'};
p=p+1;v(p,:)={ dataRD.participate2ndRound,mean_,'Particip. in 2nd round'};
p=p+1;v(p,:)={dataRD.placed2ndRound ,mean_,'Placed in 2nd round'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;

% %% Panel cantEEs
% 
% binsCantEEs=[1,10,100,1400];
% assert(max(dataRD.cantEE<1400))
% 
% 
% catsCantEE={'Urban 1 to 10','Urban 11 to 100','Urban > 100'};
% cantEECat=discretize(dataRD.cantEE,binsCantEEs,'categorical',catsCantEE);
% cantEECat = addcats(cantEECat,{'Rural'},'before','Urban 1 to 10');
% cantEECat(dataRD.newMarket<0)='Rural';
% auxCats=categories(cantEECat);
% 
% panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Classificaction by total schools in market']};
% for c=1:length(auxCats)
% p=p+1;v(p,:)={cantEECat==auxCats{c} ,mean_,auxCats{c}};
% end
% 
% varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
% v=newCell;panelPos=panelPos+1;p=0;



%% Panel spare capacity 
% Load data from spareCapacity.m

% Load congestion outcome from simulation:
load([projectDirData,'/data/chile/auxiliar/spareCapacity'],'shareAvailable');
shareAvailable.grade=shareAvailable.nivel;
shareAvailable=shareAvailable(shareAvailable.newMarket>0,:);

dataRD=outerjoin(dataRD,shareAvailable,'keys',{'newMarket','anho','grade'},'mergeKeys',true,'type','left','rightVariables',{'shareAvailable','shareAvailableFree'});
dataRD=sortrows(dataRD,{'anho','id_postulante'});

panels(panelPos,:)={size(varsC,1),[pL{panelPos},'School capacity available after placement (at local market level defined for each student)']};
p=p+1;v(p,:)={ dataRD.shareAvailable,nanmean_,'Share of total seats '};
p=p+1;v(p,:)={ dataRD.shareAvailableFree,nanmean_,'Share of seats in free schools'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;




%% Panel Enrolled school
% riskIniCat=discretize(dataRD.riskLastDayIni,[0 0 .3 .7 .9 1],'categorical','IncludedEdge','right');
% auxCats=categories(riskIniCat);



panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Attributes of enrolled school']};

p=p+1;v(p,:)={dataRD.enrolledInAny,mean_,'Enrolled at some school'};
p=p+1;v(p,:)={dataRD.enrolledInAssigned ,mean_,'Enrolled at placed'};
p=p+1;v(p,:)={dataRD.hasValueAddedEnrolled ,nanmean_,'Have value added measure|grade<=8'};
%p=p+1;v(p,:)={dataRD.valueAddedEnrolled ,nanmean_,'Value added'};
p=p+1;v(p,:)={dataRD.valueAddedEnrolledIfPlaced ,nanmean_,'Value added|enrolled at placed'};
p=p+1;v(p,:)={dataRD.valueAddedEnrolledIfNotPlaced,nanmean_,'Value added|not enrolled at placed'};
p=p+1;v(p,:)={dataRD.feeEnrolled,nanmean_,'School monthly fee (USD)'};
%p=p+1;v(p,:)={dataRD.sizePerNivelEnrolled,nanmean_,'Total enrollment per grade'};
p=p+1;v(p,:)={dataRD.meanSepEnrolled,nanmean_,'Share of vulnerable students'};



varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;

%% Panel Risk
% riskIniCat=discretize(dataRD.riskLastDayIni,[0 0 .3 .7 .9 1],'categorical','IncludedEdge','right');
% auxCats=categories(riskIniCat);
panels(panelPos,:)={size(varsC,1),[pL{panelPos},'Classification by true risk of initial attempt']};
p=p+1;v(p,:)={dataRD.riskLastDayIni ,mean_,'Mean risk'};
p=p+1;v(p,:)={dataRD.riskLastDayIni<=0.01 ,mean_,'Zero risk'};
p=p+1;v(p,:)={dataRD.riskLastDayIni>.3 ,mean_,'Risky (risk>.3)'};
%p=p+1;v(p,:)={dataRD.riskLastDayIni,@(x,subg)quantCond_(x,.25,subg),'.25 quantile |>0'};
%p=p+1;v(p,:)={dataRD.riskLastDayIni,@(x,subg)quantCond_(x,.50,subg),'.50 quantile |>0'};
%p=p+1;v(p,:)={dataRD.riskLastDayIni,@(x,subg)quantCond_(x,.75,subg),'.75 quantile |>0'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;

%%
panels=panels(not(cellfun(@isempty,panels(:,1))),:);

% Define subgroups
subgroups={true(height(dataRD),1),{'All';'';''};...
    dataRD.esSep==1,{'Economically';'Vulnerable';''};...
    dataRD.esSep==0,{'Not';'Economically';'Vulnerable'};...
    dataRD.pobPopup==1,{'Pop-up';'eligible';''};...
    dataRD.riskPopup>.3&dataRD.pobPopup==1,{'Risky';'(predicted';'risk>.3)'};...
    %dataRD.pobPopup==1&dataRD.riskPopup<.5&dataRD.riskPopup>.1,{'Around';'Pop-up';'Cutoff'};...
    %dataRD.pobWhatsapp==1&dataRD.riskWhatsapp>0.3&dataRD.grade<2,{'RCT.';'sample';'(2020)'};...
    %dataRD.inSurvey2020,{'Survey';'sample';'(2020)'}
    };


I=size(varsC,1);
J=size(subgroups,1);
preTable=nan(I,J);

for i=1:I
    fun=varsC{i,2};
    for j=1:J
        preTable(i,j)=fun(varsC{i,1},subgroups{j,1});
    end
end

%%
opts=struct;

nnote=sprintf('N: %s (%i%% from 2018, %i%% from 2019 and %i%% from 2020). ',...
    mat2cellstr(height(dataRD),'rc',1),round(100*mean(dataRD.anho==2018)),round(100*mean(dataRD.anho==2019)),round(100*mean(dataRD.anho==2020)));
%sethNote='All statistics are means in the population defined by the column header. ``Pop-up eligible'''' (col. 4) are students who submitted applications that received a risk prediction. ``Risky'''' (col. 5) is applicants whose first attempt had a predicted risks $>$ 0.3. ``Around pop-up cutoff'''' (col. 6) are applicants whose first attempt had a predicted risk in $[$0.1,0.5$]$.  ``RCT sample'''' (column 7) is applicants in treatment or control group of the 2020 RCT design. ``Survey sample'''' (column 8) is applicants who completed the 2020 school choice survey. Selected row variable definitions are as follows. ``Economically vulnerable'''' is an SES measure computed by Mineduc. ``Rural'''' is an indicator if students live in rural areas. ``Length of initial/final attempt'''' is the  number of schools on an applicants first and final choice application. ``Total attempts'''' is the number of times an applicant submitted an application to the centralized system. Application change and addition variables describe the share of applicants making different kinds of changes applicants make between their first and final submission. ``Placed in pref/1st/2nd/3rd'''' are indicators for any placement or for placement in the listed rank. ``2nd round'''' variables describe participation and placement outcomes in the second centralized placement round. ``Share of total seats/seats in free schools'''' is the share of seats in all schools/in schools without fees unfilled after the first application round in a student''s local market. Value added and school characteristic variables described in Online Appendix \ref{app:data}. VA is calculated only for grades 8 and below. True risk of initial attempt variables describe the nonplacement risk of an applicant''s initial application, evaluated using ex post observed applications.';
sethNote='All statistics are means in the population defined by the column header. ``Pop-up eligible'''' (col. 4) are students who submitted applications that received a risk prediction. ``Risky'''' (col. 5) is applicants whose first attempt had a predicted risks $>$ 0.3. Selected row variable definitions are as follows. ``Economically vulnerable'''' is an SES measure computed by Mineduc. ``Rural'''' is an indicator if students live in rural areas. ``Length of initial/final attempt'''' is the  number of schools on an applicants first and final choice application. ``Total attempts'''' is the number of times an applicant submitted an application to the centralized system. Application change and addition variables describe the share of applicants making different kinds of changes applicants make between their first and final submission. ``Placed in pref/1st'''' are indicators for any placement or for placement in the school ranked 1st. ``2nd round'''' variables describe participation and placement outcomes in the second centralized placement round. ``Share of total seats/seats in free schools'''' is the share of seats in all schools/in schools without fees unfilled after the first application round in a student''s local market. Value added and school characteristic variables described in Online Appendix \ref{app:data}. VA is calculated only for grades 8 and below. True risk of initial attempt variables describe the nonplacement risk of an applicant''s initial application, evaluated using ex post observed applications.';

opts.note=[nnote,...
      sethNote];
%'``High Performance'''' are students that come from the 20%% of their class, and does not apply to applicants to PK.'

opts.label=sprintf('tabDescriptiveStats%s',anhoStr);
opts.file=sprintf('%s/descStats%s.tex',dirTable,anhoStr);
opts.title='Descriptive Statistics for Chilean Choice Applicants';
opts.adjust=true;
opts.addnumbers=true;
opts.mergeHeader=false;
opts.firstColumn=varsC(:,3);
opts.header=[subgroups{:,2}];
opts.panel=panels;
opts.positionParameter='H';
opts.sizeFootnoteFloat='\scriptsize';
opts.verticalAdjustParam=15;
tabla=cell2latex(mat2cellstr(preTable,'revisarFils',true),'opts',opts);

if(compileLatexTable)
compileLatex(tabla)
end


% ====== ====== ====== ====== ====== ====== ====== ====== ======
%% Compare risky to non-risky:
tableRiskyVsNonRisky=false;
if(tableRiskyVsNonRisky)

panelPos=1;p=0;
panels=cell(10,2);
newCell=cell(10,3);
v=newCell;
varsC={};

%% Funs:
mean_=@(x,subg)mean(x(subg));
quantCond_=@(x,q,subg)quantile(x(subg&x>0),q);
sum_=@(x,subg)sum(x(subg));

%% Panel Regular

p=p+1;v(p,:)={dataRD.esSep ,mean_,'Vulnerable'};
p=p+1;v(p,:)={dataRD.female ,mean_,'Female'};
p=p+1;v(p,:)={getDigit(dataRD.newMarket,1)==1&getDigit(dataRD.newMarket,2)==0,mean_,'Applying in regional capital'};
p=p+1;v(p,:)={dataRD.fee_firstIni>0,mean_,'Fee-school as first option'};
p=p+1;v(p,:)={dataRD.lengthIni,mean_,'Length of initial attempt'};
p=p+1;v(p,:)={dataRD.voluntary,mean_,'Voluntary'};
p=p+1;v(p,:)={ones(height(dataRD),1),sum_,'N'};

varsC=[varsC;v(not(cellfun(@isempty,v(:,1))),:)];
v=newCell;panelPos=panelPos+1;p=0;



%% PRINT

% Define subgroups
subgroups={true(height(dataRD),1),{'All';''};...
    dataRD.pobPopup==1&dataRD.riskPopup<=.3,{'Pop-up population';'Not risky'};...
    dataRD.pobPopup==1&dataRD.riskPopup>.3,{'Pop-up population';'Risky'}};

I=size(varsC,1);J=size(subgroups,1);
preTable=nan(I,J);

for i=1:I
    fun=varsC{i,2};
    for j=1:J
        preTable(i,j)=fun(varsC{i,1},subgroups{j,1});
    end
end

opts=struct;
opts.note={'Risky = P(no placement)>.3','Pop-up population: Students that applied while the web service was active',...
    'Voluntary: students whose current school offer the next grade'};
opts.label=sprintf('tabDescRiskyNonRisky');
opts.file=sprintf('%s/descRiskyNonRisky%i.tex',dirTable,anho);
opts.title='Descriptive Statistics of Risky and Non-Risky Population';
opts.adjust=true;
%opts.addnumbers=true;
%opts.mergeHeader=false;
opts.firstColumn=varsC(:,3);
opts.header=[subgroups{:,2}];
opts.positionParameter='H';

tablaR=cell2latex(mat2cellstr(preTable,'revisarFils',true),'opts',opts);
if(compileLatexTable)
compileLatex(tablaR);
end


end

